# -*- coding: utf-8 -*-
import random
from scrapy import Request
from scrapy.utils.project import get_project_settings

from zc_core.dao.item_pool_dao import ItemPoolDao
from zc_core.spiders.base import BaseSpider
from cncecyc.rules import *
from zc_core.dao.batch_dao import BatchDao
from zc_core.util.done_filter import DoneFilter


class ItemPoolSpider(BaseSpider):
    name = 'pool'
    custom_settings = {
        'CONCURRENT_REQUESTS': 12,
        # 'DOWNLOAD_DELAY': 0.1,
        'CONCURRENT_REQUESTS_PER_DOMAIN': 12,
        'CONCURRENT_REQUESTS_PER_IP': 12,
    }

    # 常用链接
    item_url = 'https://www.cncecyc.com/share-ecommerce/applyComm/findThirdShopDetails'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(ItemPoolSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        # 避免重复采集
        self.done_filter = DoneFilter(coll_name='item_data_pool', query={'tag': {'$exists': True}})

    def start_requests(self):
        settings = get_project_settings()
        pool_list = ItemPoolDao().get_item_pool_list(fields={'supplierId': 1, 'supplierSkuId': 1, '_id': 1}, query={'tag': {'$exists': False}})
        self.logger.info('全量：%s' % (len(pool_list)))
        random.shuffle(pool_list)
        for sku in pool_list:
            sp_id = sku.get('supplierId')
            sp_sku_id = sku.get('supplierSkuId')
            sku_id = sku.get('_id')
            # 避免重复采集
            if self.done_filter.contains(sku_id) and not settings.get('FORCE_RECOVER', False):
                self.logger.info('已采: [%s]', sku_id)
                continue
            # 采集商品
            yield Request(
                url=self.item_url,
                method='POST',
                callback=self.parse_pool_data,
                errback=self.error_back,
                headers={
                    'Content-Type': 'application/json',
                },
                meta={
                    'reqType':'item',
                    'skuId': sku_id,
                    'batchNo': self.batch_no,
                },
                body=json.dumps({
                    'sku': sp_sku_id,
                    'proCode': sp_id
                }),
                priority=260,
            )

    # 处理ItemData
    def parse_pool_data(self, response):
        data = parse_pool_data(response)
        self.logger.info('商品: [%s]' % data.get('skuId'))
        yield data
